Chapter 13 Source of functional differences
# A tibble: 3 × 2
type_river count
<chr> <int>
1 core 25
2 endemic 7
3 marginal 11
***************************************************************
* Note: *
* force.ultrametric does not include a formal method to *
* ultrametricize a tree & should only be used to coerce *
* a phylogeny that fails is.ultrametric due to rounding -- *
* not as a substitute for formal rate-smoothing methods. *
***************************************************************
13.1 Taxonomic variation
genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
filter(count > 0) %>% #filter 0 counts
left_join(core_microbiota,by="genome") %>%
group_by(type,sample) %>%
summarise(fraction=sum(count)) %>%
group_by(type) %>%
summarise(mean(fraction))`summarise()` has grouped output by 'type'. You can override using the `.groups` argument.
# A tibble: 3 × 2
type `mean(fraction)`
<chr> <dbl>
1 core 0.901
2 endemic 0.132
3 marginal 0.0874
13.2 Genome size
# A tibble: 3 × 2
type genome_size
<chr> <dbl>
1 core 3337341.
2 endemic 3753149.
3 marginal 3281875.
# A tibble: 3 × 9
.y. group1 group2 n1 n2 statistic p p.adj p.adj.signif
* <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <chr>
1 length core endemic 25 7 69 0.42 0.973 ns
2 length core marginal 25 11 139 0.973 0.973 ns
3 length endemic marginal 7 11 44 0.659 0.973 ns
## Functional differences between fractions
genome_gifts %>%
to.elements(., GIFT_db) %>%
to.functions(., GIFT_db) %>%
to.domains(., GIFT_db) %>%
as.data.frame() %>%
mutate(mci=(Biosynthesis+Degradation)/2) %>%
rownames_to_column(var="genome") %>%
select(genome,mci) %>%
left_join(core_microbiota %>% select(genome,type),by="genome") %>%
pivot_longer(!c(genome,type),names_to = "trait",values_to = "value") %>%
filter(!is.na(type)) %>%
ggplot(aes(x=type, y=value, group=type))+
geom_boxplot()+
facet_grid(~trait, scales="free")genome_gifts %>%
to.elements(., GIFT_db) %>%
to.functions(., GIFT_db) %>%
to.domains(., GIFT_db) %>%
as.data.frame() %>%
mutate(mci=(Biosynthesis+Degradation)/2) %>%
rownames_to_column(var="genome") %>%
select(genome,mci) %>%
left_join(core_microbiota %>% select(genome,type),by="genome") %>%
pivot_longer(!c(genome,type),names_to = "trait",values_to = "value") %>%
filter(!is.na(type)) %>%
group_by(trait) %>%
pairwise_wilcox_test(value ~ type, p.adjust.method = "BH")# A tibble: 3 × 10
trait .y. group1 group2 n1 n2 statistic p p.adj p.adj.signif
* <chr> <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <chr>
1 mci value core endemic 23 7 72 0.701 0.701 ns
2 mci value core marginal 23 10 177 0.014 0.042 *
3 mci value endemic marginal 7 10 53 0.088 0.132 ns
genome_gifts %>%
to.elements(., GIFT_db) %>%
to.functions(., GIFT_db) %>%
as.data.frame() %>%
rownames_to_column(var="genome") %>%
left_join(core_microbiota %>% select(genome,type),by="genome") %>%
pivot_longer(!c(genome,type),names_to = "trait",values_to = "value") %>%
filter(!is.na(type),
trait %in% c("B01","B02","B03","B04","B06","B07","B08","D01","D02","D03","D05","D06","D07","D09")) %>%
group_by(trait) %>%
pairwise_wilcox_test(value ~ type, p.adjust.method = "BH") %>%
print(n=100)# A tibble: 42 × 10
trait .y. group1 group2 n1 n2 statistic p p.adj p.adj.signif
* <chr> <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <chr>
1 B01 value core endemic 23 7 57.5 0.27 0.27 ns
2 B01 value core marginal 23 10 168. 0.038 0.057 ns
3 B01 value endemic marginal 7 10 61 0.01 0.029 *
4 B02 value core endemic 23 7 69 0.598 0.598 ns
5 B02 value core marginal 23 10 173 0.024 0.073 ns
6 B02 value endemic marginal 7 10 53 0.088 0.131 ns
7 B03 value core endemic 23 7 88 0.73 0.73 ns
8 B03 value core marginal 23 10 160. 0.08 0.24 ns
9 B03 value endemic marginal 7 10 44 0.399 0.599 ns
10 B04 value core endemic 23 7 92.5 0.572 0.623 ns
11 B04 value core marginal 23 10 150. 0.182 0.546 ns
12 B04 value endemic marginal 7 10 40.5 0.623 0.623 ns
13 B06 value core endemic 23 7 79 0.961 0.961 ns
14 B06 value core marginal 23 10 153 0.142 0.256 ns
15 B06 value endemic marginal 7 10 49.5 0.171 0.256 ns
16 B07 value core endemic 23 7 65 0.471 0.471 ns
17 B07 value core marginal 23 10 174 0.022 0.042 *
18 B07 value endemic marginal 7 10 58 0.028 0.042 *
19 B08 value core endemic 23 7 54 0.202 0.202 ns
20 B08 value core marginal 23 10 171 0.029 0.044 *
21 B08 value endemic marginal 7 10 62 0.01 0.029 *
22 D01 value core endemic 23 7 98.5 0.39 0.585 ns
23 D01 value core marginal 23 10 172. 0.028 0.084 ns
24 D01 value endemic marginal 7 10 39 0.73 0.73 ns
25 D02 value core endemic 23 7 66.5 0.508 0.508 ns
26 D02 value core marginal 23 10 168. 0.038 0.113 ns
27 D02 value endemic marginal 7 10 53 0.086 0.13 ns
28 D03 value core endemic 23 7 84 0.883 0.883 ns
29 D03 value core marginal 23 10 182. 0.01 0.029 *
30 D03 value endemic marginal 7 10 56.5 0.038 0.057 ns
31 D05 value core endemic 23 7 90 0.666 0.666 ns
32 D05 value core marginal 23 10 169 0.034 0.104 ns
33 D05 value endemic marginal 7 10 46 0.315 0.473 ns
34 D06 value core endemic 23 7 64.5 0.447 0.447 ns
35 D06 value core marginal 23 10 163 0.062 0.092 ns
36 D06 value endemic marginal 7 10 57 0.034 0.092 ns
37 D07 value core endemic 23 7 80.5 1 1 ns
38 D07 value core marginal 23 10 180. 0.012 0.036 *
39 D07 value endemic marginal 7 10 51.5 0.11 0.165 ns
40 D09 value core endemic 23 7 93 0.537 0.806 ns
41 D09 value core marginal 23 10 136. 0.387 0.806 ns
42 D09 value endemic marginal 7 10 35 1 1 ns
genome_gifts %>%
to.elements(., GIFT_db) %>%
to.functions(., GIFT_db) %>%
as.data.frame() %>%
rownames_to_column(var="genome") %>%
left_join(core_microbiota %>% select(genome,type),by="genome") %>%
pivot_longer(!c(genome,type),names_to = "trait",values_to = "value") %>%
filter(!is.na(type)) %>%
ggplot(aes(x=type, y=value, group=type))+
geom_boxplot()+
facet_grid(~trait, scales="free")13.3 Functional differences between high and low endemisms
genome_gifts %>%
to.elements(., GIFT_db) %>%
to.functions(., GIFT_db) %>%
to.domains(., GIFT_db) %>%
as.data.frame() %>%
mutate(mci=(Biosynthesis+Degradation)/2) %>%
rownames_to_column(var="genome") %>%
select(genome,mci) %>%
left_join(core_microbiota %>% select(genome,type_prevalence_environment, type),by="genome") %>%
filter(type == "endemic") %>%
select(-type) %>%
pivot_longer(!c(genome,type_prevalence_environment),names_to = "trait",values_to = "value") %>%
filter(!is.na(type_prevalence_environment)) %>%
ggplot(aes(x=type_prevalence_environment, y=value, group=type_prevalence_environment))+
geom_boxplot()+
facet_grid(~trait, scales="free")genome_gifts %>%
to.elements(., GIFT_db) %>%
to.functions(., GIFT_db) %>%
to.domains(., GIFT_db) %>%
as.data.frame() %>%
mutate(mci=(Biosynthesis+Degradation)/2) %>%
rownames_to_column(var="genome") %>%
select(genome,mci) %>%
left_join(core_microbiota %>% select(genome,type_prevalence_environment, type),by="genome") %>%
filter(type == "endemic") %>%
select(-type) %>%
pivot_longer(!c(genome,type_prevalence_environment),names_to = "trait",values_to = "value") %>%
filter(!is.na(type_prevalence_environment)) %>%
group_by(trait) %>%
pairwise_wilcox_test(value ~ type_prevalence_environment, p.adjust.method = "BH")# A tibble: 1 × 10
trait .y. group1 group2 n1 n2 statistic p p.adj p.adj.signif
* <chr> <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <chr>
1 mci value high low 5 2 2 0.381 0.381 ns
genome_gifts %>%
to.elements(., GIFT_db) %>%
to.functions(., GIFT_db) %>%
as.data.frame() %>%
rownames_to_column(var="genome") %>%
left_join(core_microbiota %>% select(genome,type_prevalence_environment, type),by="genome") %>%
filter(type == "endemic") %>%
select(-type) %>%
pivot_longer(!c(genome,type_prevalence_environment),names_to = "trait",values_to = "value") %>%
filter(!is.na(type_prevalence_environment),
trait %in% c("B01","B02","B03","B04","B06","B07","B08","D01","D02","D03","D05","D06","D07","D09")) %>%
ggplot(aes(x=type_prevalence_environment, y=value, group=type_prevalence_environment))+
geom_boxplot()+
facet_grid(~trait, scales="free")genome_gifts %>%
to.elements(., GIFT_db) %>%
to.functions(., GIFT_db) %>%
as.data.frame() %>%
rownames_to_column(var="genome") %>%
left_join(core_microbiota %>% select(genome,type_prevalence_environment, type),by="genome") %>%
filter(type == "endemic") %>%
select(-type) %>%
pivot_longer(!c(genome,type_prevalence_environment),names_to = "trait",values_to = "value") %>%
filter(!is.na(type_prevalence_environment),
trait %in% c("B01","B02","B03","B04","B06","B07","B08","D01","D02","D03","D05","D06","D07","D09")) %>%
group_by(trait) %>%
pairwise_wilcox_test(value ~ type_prevalence_environment, p.adjust.method = "BH") %>%
print(n=100)# A tibble: 14 × 10
trait .y. group1 group2 n1 n2 statistic p p.adj p.adj.signif
* <chr> <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <chr>
1 B01 value high low 5 2 2 0.381 0.381 ns
2 B02 value high low 5 2 2 0.381 0.381 ns
3 B03 value high low 5 2 5 1 1 ns
4 B04 value high low 5 2 4 0.857 0.857 ns
5 B06 value high low 5 2 1.5 0.241 0.241 ns
6 B07 value high low 5 2 1 0.19 0.19 ns
7 B08 value high low 5 2 2 0.329 0.329 ns
8 D01 value high low 5 2 1 0.171 0.171 ns
9 D02 value high low 5 2 0 0.079 0.079 ns
10 D03 value high low 5 2 0 0.079 0.079 ns
11 D05 value high low 5 2 1 0.19 0.19 ns
12 D06 value high low 5 2 2 0.381 0.381 ns
13 D07 value high low 5 2 2 0.381 0.381 ns
14 D09 value high low 5 2 2 0.285 0.285 ns